#!/usr/bin/pypy
# -*- coding: utf-8 -*-
from __future__ import division
import sys, argparse
import csv
import time as t
import datetime as d
'''
用户加入course的天数
距离加入该课程最新的动态时间
'''

def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('log')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()
log, output = args['log'], args['output']


reader = csv.reader(open(log))

time_format = "%Y-%m-%dT%H:%M:%S"

user_latest_time = {}

class DateSpan(object):
    def __init__(self):
        self.min_date = None
        self.max_date = None

line_no = 0
for enrollment_id,username,course_id,time,source,event,_object in reader:
    line_no += 1 
    if line_no == 1: continue
    _time = t.strptime(time, time_format)

    date = d.datetime(_time.tm_year, _time.tm_mon, _time.tm_mday, _time.tm_hour)

    #key = "%s-%s" % (username, course_id)
    key = enrollment_id
    
    if key not in user_latest_time:
        user_latest_time[key] = DateSpan()

    rcd = user_latest_time[key]

    if rcd.min_date is None: rcd.min_date = date
    if rcd.max_date is None: rcd.max_date = date

    if date < rcd.min_date: rcd.min_date = date
    if date > rcd.max_date: rcd.max_date = date


writer = csv.writer(open(output, 'w'))
writer.writerow(["enrollment_id", "n_days"])

for key, datespan in user_latest_time.items():
    n_days = int ( (datespan.max_date - datespan.min_date).total_seconds() / 3600)
    writer.writerow([key, n_days])

